########################################################################################################################## GEOGRAPHIC MAPS IN R ############################################################################################################################# OUTLINE ### 1. National and Subnational Maps Using Tigris# 2. World Maps Using Rnaturalearth# 3. National and Subnational Maps Using Tigris and Tidycensus for Analyzing Census Data# Install these packages (if you haven't already):# install.packages("tigris")# install.packages("tidycensus")# install.packages("rnaturalearth")# install.packages("rnaturalearthdata") #needed to run rnaturalearth# install.packages("ggthemes")# install.packages("tidycensus")# install.packages("ggrepel")# install.packages("socviz")# install.packages("mapview")library(tidyverse)library(tigris)library(rnaturalearth)library(tidycensus)library(ggthemes)library(socviz)library(scales)library(haven)library(ggrepel)library(mapview)############################################################################################################ 1. National and Subnational Maps Using Tigris ################################################################################################################ Go-to source for tigris: Analyzing US Census Data: Methods, Maps, and Models in R, # by Kyle Walker. https://walker-data.com/census-r/index.html# See in particular Ch. 5 for basic information on tigris# This book is the same go-to source in Part 2 on "tidycensus"# Q: What is a CHOROPLETH MAP? # Start simple: Use tigris to get national map with states (take out PR)st <-states(cb =TRUE, resolution ="20m", progress_bar=FALSE) |>filter(NAME !="Puerto Rico") # View# Check "class" of this data/maps object; you'll see it's "special features" (sf) and # a data frame sf = contains geographical information for mapping.class(st)
[1] "sf" "data.frame"
# You can sort by state name if you want. This is not necessary, however.st <-arrange(st, NAME)# Quick map (notice how much simpler the syntax is using "sf" method)ggplot(st) +geom_sf()
# What happened?! :-) # Add "shift_geometry" at the end; gives better look, and puts AK and HI on bottomst <-states(cb =TRUE, resolution ="20m", progress_bar=FALSE) |>filter(NAME !="Puerto Rico") |>shift_geometry()# Quick map: Geometry here is "sf", which means ggplot recognizes this is a # "special features" geographic maps object.ggplot(st) +geom_sf()
# Change colors: Check out the color palette file in the Week 4 data session folderggplot(st) +geom_sf(fill="dodgerblue") +theme_void()
# Change state border line colorsggplot(st) +geom_sf(fill="dodgerblue", color="white") +theme_void()
# Change state border line width - thickggplot(st) +geom_sf(fill="dodgerblue", color="white", linewidth=.8) +theme_void()
# Change state border line width - thinggplot(st) +geom_sf(fill="dodgerblue", color="white", linewidth=.1) +theme_void()
# Let's start with a very simple choropleth map - 2020 election, Trump v. Biden# Import 2020 state election datav2020 <-read_csv("us_vote_2020.csv")# View# Our task in generating a choropleth map: # We need to merge our election data (v2020) with our "sf" maps object, "st"# See Ch. 3 in ModernDive, section 3.7 (using the "join" function) # First, match state names across datasets; use the "mutate" function, which generates# a new variable in the data object. This sets up for the merge, which requires you to# have **one** identification variable in common between the two datasets. In this case,# that's going to be "NAME."v2020 <- v2020 |>mutate(NAME=state)# Note how I'm using piping; the above command would be identical to: # v2020 <- mutate(v2020, NAME=state)# Remember, when you use piping, you call on the dataset before the pipe operator# and then in subsequent commands, you don't need to call in the data.# Merge our v2020 data object with our st object containing geographic information using# "left_join." Notice the syntax there. Put the "master" data object first (i.e., st) and # the data object that you're merging into the master object second.# "left_join" automatically merges by the variable that is common between the two# datasets. Again, in this example, that's "NAME."stv20 <-left_join(st, v2020)# Map using "theme_void"ggplot(stv20, aes(fill = called)) +geom_sf(color ="gray90", linewidth=.2) +scale_fill_manual(values =c("blue", "red"), labels=c("Biden", "Trump")) +theme_void() +coord_sf(expand=FALSE) +labs(fill ="",title =" 2020 US presidential election results by state",caption ="Note: Nebraska and Maine split electoral college votes by congressional district") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =8))
# Labelsggplot(stv20, aes(fill = called)) +geom_sf(color ="gray90", linewidth=.2) +scale_fill_manual(values =c("blue", "red"), labels=c("Biden", "Trump")) +theme_void() +coord_sf(expand=FALSE) +geom_text(aes(label = STUSPS, geometry = geometry),stat ="sf_coordinates", size=2, color="white") +labs(fill ="",title =" 2020 US presidential election results by state",caption ="Note: Nebraska and Maine split electoral college votes by congressional district") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =8))
# Using "theme_map" (from ggthemes), center title, make a little bigger (expand=FALSE)ggplot(stv20, aes(fill = called)) +geom_sf(color ="gray90", linewidth=.2) +scale_fill_manual(values =c("blue", "red"), labels=c("Biden", "Trump")) +theme_map() +coord_sf(expand=FALSE) +labs(fill ="",title =" 2020 US presidential election results by state",caption ="Note: Nebraska and Maine split electoral college votes by congressional district") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =8))
# Now let's produce a national map at the **counties** level# First, tell tigris to give us a geographic object at the county levelco <-counties(cb=TRUE, resolution="20m", year="2021", progress_bar=FALSE) |>filter(STATEFP !=72) |>shift_geometry()# How many counties are there in the U.S.?# Quick view: ggplot(data=co) +geom_sf(linewidth=.2, fill="white") +theme_void()
# We can also overlay this plot with state line borders. Just add another geom_sf commandggplot(data=co) +geom_sf(linewidth=.2, fill="white") +geom_sf(data=st, linewidth=.4, color="black", fill=NA) +theme_void()
# Read in county-level data from Healy's book (from socviz package)codata <- county_data# Match id variables across datasets. We'll match "id" in codata with "GEOID" in tigris# object.codata <- codata |>mutate(GEOID=id)# Merge Healy co. data with geographic maps data from Tigris; will match on GEOIDcomerge <-left_join(co, codata)# Choropleth map of household income at county level. # Note use of piping preceding ggplot# Note: label=dollar is from the "scales" package# Theme map instead, center title, zooom in a little (expand=FALSE)comerge |>ggplot(aes(fill = hh_income)) +geom_sf(color ="gray90", linewidth=.05) +scale_fill_distiller(palette ="Blues", labels=dollar) +theme_void() +coord_sf(expand=FALSE) +labs(fill ="Household Income",title ="Household Income by County",caption ="Source: Healy County Data") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# Reverse legend scale; make high values of income darker colorscomerge |>ggplot(aes(fill = hh_income)) +geom_sf(color ="gray90", size=.05) +scale_fill_distiller(palette ="Blues", direction =1, labels=dollar) +theme_void() +coord_sf(expand=FALSE) +labs(fill ="Household Income",title ="Household Income by County",caption ="Source: Healy County Data") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =8))
# Overlay state lines on county map; change coloring to yellow-green-blue palette# Experiment with different palettes and different border colors, etc.comerge |>ggplot() +geom_sf(data=comerge, aes(fill = hh_income), color ="gray70", size=.05) +geom_sf(data=st, fill=NA, color="black", linewidth=.1) +theme_void(base_size=16) +coord_sf(expand=FALSE) +scale_fill_distiller(palette ="YlGnBu", direction =1, labels=dollar) +labs(fill ="Household Income",title ="Household Income by County",caption ="Source: Healy County Data") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size =10),legend.text =element_text(size =8))
# Experiment with different palettes and different border colors, etc. Diverging colorscomerge |>ggplot() +geom_sf(data=comerge, aes(fill = hh_income), color ="gray70", size=.05) +geom_sf(data=st, fill=NA, color="black", linewidth=.1) +theme_void(base_size=16) +coord_sf(expand=FALSE) +scale_fill_distiller(palette ="RdYlBu", direction =1, labels=dollar) +labs(fill ="Household Income",title ="Household Income by County",caption ="Source: Healy County Data") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size =10),legend.text =element_text(size =8))
# Experiment with different palettes and different border colors, etc.: # "scale_gradient2" to define midpointcomerge |>ggplot() +geom_sf(data=comerge, aes(fill = hh_income), color ="gray70", size=.05) +geom_sf(data=st, fill=NA, color="black", linewidth=.1) +theme_void(base_size=16) +coord_sf(expand=FALSE) +scale_fill_gradient2(low ="red3", mid ="white", high ="dodgerblue2", midpoint =60000,labels=dollar) +labs(fill ="Household Income",title ="Household Income by County",caption ="Source: Healy County Data") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size =10),legend.text =element_text(size =8))
#################################################################################################################### 2. World Maps Using Rnaturalearth #################################################################################################################### Bring in world map from rnaturalearthw <-ne_countries(scale ="medium", returnclass ="sf")# Check "class" of "w." class(w)
[1] "sf" "data.frame"
#Note "sf" = simple features# Sort by type and country namew <-arrange(w, type, name)# View# Basic map using defaults; choropleth map for "income_grp" (categorical) variable# in our "w" object.w |>ggplot() +geom_sf(aes(fill=income_grp)) +scale_fill_brewer(palette ="YlGnBu")
# Change direction of scale - higher values of income darkerw |>ggplot() +geom_sf(aes(fill=income_grp)) +scale_fill_brewer(palette ="YlGnBu", direction=-1)
# Remove Antarctica and others; remove gray background, relabel legend, theme mapw |>filter(type !="Dependency", type !="Disputed", type !="Indeterminate") |>ggplot() +geom_sf(aes(fill=income_grp), color="black", linewidth=.1) +scale_fill_brewer(palette ="YlGnBu", direction=-1, labels=c("High OECD", "High non-OECD", "Upper middle", "Lower middle", "Low")) +coord_sf(expand=FALSE) +labs(fill ="Income Level", title ="World Map, Country Income Levels",caption="Source: R Natural Earth Data") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# You can also zoom in using latitudes and longitudes. # Projections! Default is crs = 4326; you can change projections# See: https://semba-blog.netlify.app/01/26/2020/world-map-and-map-projections/# For coordinates for specific projections, see: https://epsg.io/# We're going to use Robinson projection for world maps in this cass: ESRI:54030.# see NY Times...w |>filter(type !="Dependency", type !="Disputed", type !="Indeterminate") |>ggplot() +geom_sf(aes(fill=income_grp), color="black", linewidth=.1) +scale_fill_brewer(palette ="YlGnBu", direction=-1, labels=c("High OECD", "High non-OECD", "Upper middle", "Lower middle", "Low")) +coord_sf(crs='ESRI:54030', expand=FALSE) +labs(fill ="Income Level", title ="World Map, Country Income Levels",caption="Source: R Natural Earth Data") +theme_void() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# Theme mapw |>filter(type !="Dependency", type !="Disputed", type !="Indeterminate") |>ggplot() +geom_sf(aes(fill=income_grp), color="black", linewidth=.1) +scale_fill_brewer(palette ="YlGnBu", direction=-1, labels=c("High OECD", "High non-OECD", "Upper middle", "Lower middle", "Low")) +coord_sf(crs='ESRI:54030', expand=FALSE) +labs(fill ="Income Level", title ="World Map, Country Income Levels",caption="Source: R Natural Earth Data") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# Let's do a choropleth map for a continuous variable, like GDPw |>filter(type !="Dependency", type !="Disputed", type !="Indeterminate") |>ggplot() +geom_sf(aes(fill=gdp_md), color="black", linewidth=.1) +scale_fill_distiller(palette ="YlGnBu", direction=1, labels=dollar) +coord_sf(crs='ESRI:54030', expand=FALSE) +labs(fill ="GDP", title ="World Map, GDP",caption="Source: R Natural Earth Data") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# Let's merge outside dataset: VDem (Varieties of Democracy). # Let's look at degree of political, rhetorical attacks (by politicians and elites) # on the judiciary. # Merge in VDem data; I have already created a country id variable ("geounit") that is # common between data objects.v <-read_dta("vdem.dta")# Merge vdem with world datavw <-left_join(w, v)# World map, Robinsonvw |>filter(type !="Dependency", type !="Disputed", type !="Indeterminate") |>ggplot() +geom_sf(color="black", linewidth=0.1, aes(fill =as.factor(v2jupoatck_ord))) +coord_sf(crs='ESRI:54030', expand=FALSE) +scale_fill_brewer(type ="seq", palette ="YlGnBu", na.translate=FALSE,labels=c("Daily/weekly", "Every month", "More than once", "Rare", "None")) +labs(fill ="Attacks", title ="Government Attacks on the Judiciary, 2021",subtitle="Based on VDem's v2jupoatck_ord Variable",caption="Source: VDem") +theme_map() +theme(plot.title=element_text(face="bold", size=15, hjust=.5), plot.subtitle=element_text(size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =10),legend.title =element_text(size =10) )
# Switch direction of scale - darker colors = more attacksvw |>filter(type !="Dependency", type !="Disputed", type !="Indeterminate") |>ggplot() +geom_sf(color="black", linewidth=0.1, aes(fill =as.factor(v2jupoatck_ord))) +coord_sf(crs='ESRI:54030', expand=FALSE) +scale_fill_brewer(type ="seq", palette ="YlGnBu", na.translate=FALSE,labels=c("Daily/weekly", "Every month", "More than once", "Rare", "None"), direction =-1) +labs(fill ="Attacks", title ="Government Attacks on the Judiciary, 2021",subtitle="Based on VDem's v2jupoatck_ord Variable",caption="Source: VDem") +theme_map() +theme(plot.title=element_text(face="bold", size=15, hjust=.5), plot.subtitle=element_text(size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =10),legend.title =element_text(size =10) )
# Save as .png fileggsave("attacks_ord_merc.png", bg="white", w=9, h=4)###################### ###################### ###################### ######################################### Other map options in Tigris (US Maps) ################################################### ###################### ###################### #################### Zoom in on one state: NYnyco <-counties("NY", cb =TRUE, progress_bar=FALSE)# Quick mapnyco |>ggplot() +geom_sf() +theme_void()
# Merge county data to bring in incomenymerge <-left_join(nyco, codata)# Choropleth map for income for NY countiesnymerge |>ggplot() +geom_sf(aes(fill = hh_income), color ="black", linewidth=.1) +theme_void() +scale_fill_distiller(palette ="YlGnBu", direction =1, labels=dollar) +labs(fill ="Household Income",title ="Household Income by County, New York State",caption ="Source: Healy County Data") +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size =10),legend.text =element_text(size =8))
# Tons of other options in Tigris: counties, cities, census tracts and blocks, # school districtsdct <-tracts("DC", cb=TRUE, progress_bar=FALSE)dct |>ggplot() +geom_sf(fill="lightblue", linewidth=.2) +theme_void()
############################################################################################################### Other options in RNaturalEarth ######################################################################################################### We can also zoom in on continents; tell rnaturalearth to pull up South Americasa <-ne_countries(scale ="medium", returnclass ="sf", continent ="South America")# South Americasa |>ggplot() +geom_sf(aes(fill=income_grp), color="black", linewidth=.1) +scale_fill_brewer(palette ="YlGnBu", direction=-1, labels=c("High OECD", "High non-OECD", "Upper middle", "Lower middle", "Low")) +coord_sf(expand=FALSE) +labs(fill ="Income Level", title ="Country Income Levels in South America",caption="Source: R Natural Earth Data") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# SA GDPsa |>ggplot() +geom_sf(aes(fill=gdp_md), color="black", linewidth=.1) +scale_fill_distiller(palette ="YlGnBu", direction=1, labels=dollar) +coord_sf(expand=FALSE) +labs(fill ="GDP", title ="GDP, South America",caption="Source: R Natural Earth Data") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.caption =element_text(size=10), legend.title =element_text(size=10),legend.text =element_text(size =8))
# South America VDemvw |>filter(continent=="South America") |>ggplot() +geom_sf(color="black", linewidth=0.1, aes(fill =as.factor(v2jupoatck_ord))) +coord_sf(expand=FALSE) +scale_fill_brewer(type ="seq", palette ="YlGnBu", na.translate=FALSE,labels=c("Daily/weekly", "Every month", "More than once", "Rare", "None"),direction=-1) +labs(fill ="Attacks", title ="Government Attacks on the Judiciary, South America, 2021",subtitle="Based on VDem's v2jupoatck_ord Variable",caption="Source: VDem") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.subtitle=element_text(size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =10),legend.title =element_text(size =10) )
# South America; add country labelsvw |>filter(continent=="South America") |>ggplot() +geom_sf(color="black", linewidth=0.1, aes(fill =as.factor(v2jupoatck_ord))) +coord_sf(expand=FALSE) +scale_fill_brewer(type ="seq", palette ="YlGnBu", na.translate=FALSE,labels=c("Daily/weekly", "Every month", "More than once", "Rare", "None"),direction=-1) +geom_label_repel(aes(label = abbrev, geometry = geometry),stat ="sf_coordinates",min.segment.length =0,size=3,point.padding =NA,segment.color ="grey20") +labs(fill ="Attacks", title ="Government Attacks on the Judiciary, South America, 2021",subtitle="Based on VDem's v2jupoatck_ord Variable",caption="Source: VDem") +theme_map() +theme(plot.title=element_text(face="bold", size=12, hjust=.5), plot.subtitle=element_text(size=12, hjust=.5), plot.caption =element_text(size=10), legend.text =element_text(size =10),legend.title =element_text(size =10) )
############################################################################################################ 3. National and Subnational Maps Using Tigris ######################################### and Tidycensus for Analyzing Census Data ################################################################################################################ Go-to source for tidycensus: https://walker-data.com/census-r/index.html# Analyzing US Census Data: Methods, Maps, and Models in R, by Kyle Walker# See in particular Chs. 2-6# To use tidycensus, you need to register for a "key" online from the Census Bureau# Free and easy to do. The first time you load the package, it will give you instructions# for getting a key.# Let's start with educational attainment from 2020 ACS (5 year)# Go to data.census.gov to look up Census variables. # Call up data (and geographical data) from tidycensus # National map, subdivided by state; remember, shift geometry for national map, # get rid of PRstateba <-get_acs(geography ="state",variables ="B06009_005",summary_var ="B06009_001",year =2020,geometry =TRUE,resolution ="20m") |>shift_geometry() |>mutate(propba=estimate/summary_est) |>mutate(pctba=100*estimate/summary_est) |>filter(NAME !="Puerto Rico")# Note: _001 is "total", while _005 is "B.A. degree"stateba |>ggplot(aes(fill = propba)) +geom_sf(color ="black", linewidth=.05) +scale_fill_distiller(palette ="YlGnBu", direction =1, label=percent) +theme_void() +coord_sf(expand=FALSE) +labs(fill ="% B.A.",title ="Percent with B.A.",caption ="Source: 2020 ACS, 5-year") +theme(plot.title=element_text(face="bold", size=12, hjust =0.5), plot.caption =element_text(size=10),legend.text =element_text(size =8))
# Interactive map with mapview (see also tmap and leaflet)mapview(stateba, zcol="pctba")